# coding:utf

import os
from pyspark import *
from operator import *
# from import 必须从包或.py文件开始，不能从文件夹开始
from package.defs_4 import *

if __name__ == '__main__':
    conf=SparkConf().setAppName("8_rdd_broadcast")\
    .setMaster("local[*]")
    sc=SparkContext(conf=conf)

    # localhost_path='file://'+os.path.dirname(os.path.dirname(os.getcwd()))+'/data/input/SogouQ.txt'
    # localhost_path="hdfs://hadoop3cluster/updown/input/SogouQ.txt"

    stu_info_list = [(1, '张大仙', 11),
                     (2, '王晓晓', 13),
                     (3, '张甜甜', 11),
                     (4, '王大力', 11)]
    # 广播到每个excutor进程，减低内存占用和网络IO
    # 不使用会发送到每个分区
    broadcastList=sc.broadcast(stu_info_list)
    score_info_rdd = sc.parallelize([
        (1, '语文', 99),
        (2, '数学', 99),
        (3, '英语', 99),
        (4, '编程', 99),
        (1, '语文', 99),
        (2, '编程', 99),
        (3, '语文', 99),
        (4, '英语', 99),
        (1, '语文', 99),
        (3, '英语', 99),
        (2, '编程', 99)
    ])
#     1、todo需求：将rdd的序号转换成list对应的中文明
    def func_map(data:tuple) -> tuple:
        for x in broadcastList.value:
            if data[0] == x[0]:
                return (x[1],data[1],data[2])
        return data
    result=score_info_rdd.map(func_map)
    print(result.collect())















